 
;;########################################################################
;; misdsd.lsp
;; Copyright (c) 1998 by Pedro Valero (valerop@uv.es)
;; Functions for missing data

;;########################################################################
 

(defun non-missing (var) 
  "Args: VAR. Takes a var with missing values and gives the var without them "
  (select var (which (map-elements 'not (map-elements 'equal nil var)))))

(defun id-missing (var) 
  "Args: VAR. Takes a var with missing values and gives the position of them"
  (which  (map-elements 'equal nil var)))

(defun id-non-missing (var)
  "Args: VAR. Takes a var with missing values and gives the position of the non-missing"
  (which (map-elements 'not (map-elements 'equal nil var))))

(defun list-missing (data)
  "Args: DATA. Returns a list of lists of positions of missing data in DATA"
  
  (map-elements #'id-missing (column-list data))
  )

;;Checks if a var has variance

(defun variancep (var)
  "Checks if a var has variance. Returns nil if has not and the sum of squares otherwise"
  (let (
        (sum-c (sum (** (- var (mean var)) 2)))
        )
    (if (= sum-c 0)
        nil
        sum-c)))

;; This function is 30 times faster than Forrest's. I have overrided Forrest's in the generic file.

#|(defun my-remove-missing-data-rows (matrix &key labels)
     (let*((LM (REMOVE-DUPLICATES (COMBINE (LIST-MISSING MATRIX))))
           (MISSING-rows (SET-DIFFERENCE (ISEQ (ARRAY-DIMENSION MATRIX 0)) LM)))
       (if labels
           (list (select matrix missing-rows (iseq (array-dimension matrix 1)))
                 (select labels missing-rows))
           (select matrix missing-rows (iseq (array-dimension matrix 1))))))|#

;;Checks if all rows in a matrix have missing values

(DEFUN ALL-ROWS-MISSING-p (MATRIX) 
     (LET* ((LM (REMOVE-DUPLICATES (COMBINE (LIST-MISSING MATRIX))))
            (ALL-MISSING (SET-DIFFERENCE (ISEQ (ARRAY-DIMENSION MATRIX 0)) LM))) 
       (NULL ALL-MISSING)))

;;Computes Listwise correlations and returns them as list

(defun listwisecorrelations (listwisedata)
;<<<<<<< MISSD4.LSP
  "Args. LISTWISEDATA. Data without missing. "
  (let (
    (res)
        )
  (cond (
         (= (array-dimension listwisedata 0) 0)
         (setf res (identity-matrix (array-dimension listwisedata 1))))
         (
          (> (array-dimension listwisedata 0) 0) 
          (let* (
                 (n (array-dimension listwisedata 0))
                 (dim (array-dimension listwisedata 1))
                 (vars-with-variance (which (mapcar #'variancep (column-list listwisedata))))
                 (vars-without-variance (set-difference (iseq (length vars-with-variance))
                                                        vars-with-variance))
                 (matrix-res (make-array (list dim dim) :initial-element '0 ))
                 )
            (cond 
              (
               (equal (remove-duplicates vars-with-variance) 'nil)
               (identity-matrix n))
              (t 
               (setf (select matrix-res vars-with-variance vars-with-variance)
                     (covariance-matrix 
                      (normalize 
                       (select listwisedata 
                               (iseq n) 
                               vars-with-variance))))
               ))
            (setf res matrix-res)))
         )
    res))
#|=======
  "Args. LISTWISEDATA. Data without missing. "
  (let* (
         (n (array-dimension listwisedata 0))
         (dim (array-dimension listwisedata 1))
         (vars-with-variance (which (mapcar #'variancep (column-list listwisedata))))
         (vars-without-variance (set-difference (iseq (length vars-with-variance))
                                      vars-with-variance))
         (matrix-res (make-array (list dim dim) :initial-element '0 ))
         )
    (cond 
      (
       (equal (remove-duplicates vars-with-variance) 'nil)
       (identity-matrix n))
      (t 
       (setf (select matrix-res vars-with-variance vars-with-variance)
             (covariance-matrix 
              (normalize 
               (select listwisedata 
                       (iseq n) 
                       vars-with-variance))))
       ))
         matrix-res))
>>>>>>> 1.2|#


; Computes Pairwise correlations and other information

(defun pairwisecorrelations (data missing-by-var)
  "Args. DATA. Data with missing data. Missing-by-var. A list of lists of missing data in each variable. Returns
a list with four lists: pairwise correlations, n pairwise y n by variable"
  (let* (;(corrpairwise nil)
         (data data)
         (missing-by-var missing-by-var)
         (i nil)
         (j nil)
         (h nil)
         (pairw nil)
         (n1 nil)
         (n2 nil)
         (secuencia nil)
         (paired-list nil)
         (cases (select (array-dimensions data ) 0))
         (nvars (select (array-dimensions data ) 1))
         (index-list (iseq cases))
         (corrpairwise-matrix (identity-matrix nvars))
         (npairwise-matrix (make-array (list nvars nvars)))
         (n1-matrix (make-array (list nvars nvars)))
         (n2-matrix (make-array (list nvars nvars)))
         (temp nil)
         )
    
    (dotimes (i nvars)
             (dotimes (j i)
                      (setf paired-list  
                            (union (select missing-by-var i)
                                   (select missing-by-var j)))
                      (setf secuencia (set-difference index-list
                                                      paired-list))
                     (cond
                       (
                        (= j i)
                        (setf (select corrpairwise-matrix i j) 1)
                        (setf (select corrpairwise-matrix j i) 1)
                        )
                        
                       (
                        (and 
                         (variancep (select data secuencia i))
                         (variancep (select data secuencia j)))
                        (setf temp (covariance-matrix 
                                    (select data secuencia (list i j))))
                        (setf (select corrpairwise-matrix j i)
                              (/ (select temp 0 1) 
                                 (* (sqrt (select temp 0 0)) 
                                    (sqrt (select temp 1 1)))))
                        (setf (select corrpairwise-matrix i j)
                              (select corrpairwise-matrix j i))
                        )
                       (t 
                        (setf (select corrpairwise-matrix i j) 0)
                       )
                       )
                      (setf (select npairwise-matrix i j) (length secuencia))
                      (setf (select n1-matrix i j)  (- cases (length 
                                               (select missing-by-var i))))
                      (setf (select n2-matrix i j)  (- cases (length 
                                               (select missing-by-var j))))
                      ))
    (list 
     corrpairwise-matrix
          (select (combine npairwise-matrix) (which (combine npairwise-matrix))) 
          (select (combine n1-matrix) (which (combine n1-matrix)))
          (select (combine n2-matrix) (which (combine n2-matrix))))
  ))


(defun pairwisecovariances (data missing-by-var)
  "Args. DATA. Data with missing data. Missing-by-var. A list of lists of missing data in each variable. Returns
a list with four lists: pairwise covariances, n pairwise y n by variable"
  (let* (;(corrpairwise nil)
         (data data)
         (missing-by-var missing-by-var)
         (i nil)
         (j nil)
         (h nil)
     
         (pairw nil)
         ;(x nil)
         ;(y nil)
        ; (zx nil)
         ;(zy nil)
         ;(npairwise nil)
         (n1 nil)
         (n2 nil)
         (secuencia nil)
         (paired-list nil)
         (cases (select (array-dimensions data ) 0))
         (nvars (select (array-dimensions data ) 1))
         (index-list (iseq 0 (-  cases 1)))
         (corrpairwise-matrix (make-array (list nvars nvars)))
         (npairwise-matrix (make-array (list nvars nvars)))
         (n1-matrix (make-array (list nvars nvars)))
         (n2-matrix (make-array (list nvars nvars)))
         (temp nil)
         )
 
    (dotimes (i nvars)
             (dotimes (j nvars)
                      (setf paired-list  
                                         (union (select missing-by-var i)
                                                 (select missing-by-var j)))
                      (setf secuencia (set-difference index-list
                                        paired-list))
                     
                     (cond
                       (
                        (and 
                         (variancep (select data secuencia i))
                         (variancep (select data secuencia j)))
                        (setf (select corrpairwise-matrix i j)
                              (covariance-matrix 
                               (select data secuencia (list i j))))
                        )
                       (t 
                        (setf (select corrpairwise-matrix i j) 0))
                       )
                      (setf (select npairwise-matrix i j) (length secuencia))
                      (setf (select n1-matrix i j)  
                            (- cases (length 
                                      (select missing-by-var i))))
                      (setf (select n2-matrix i j)  
                            (- cases (length 
                                      (select missing-by-var j))))
                     
                      ))
    
    (list corrpairwise-matrix 
          (select (combine npairwise-matrix) (which (combine npairwise-matrix))) 
          (select (combine n1-matrix) (which (combine n1-matrix)))
          (select (combine n2-matrix) (which (combine n2-matrix)))
          )
    )
  )


(defun missing-in-plot (plot-imputed-pairwise missing-by-var var0 var1)
  (let* (
        (plot-imputed-pairwise plot-imputed-pairwise)
        (missing-by-var missing-by-var)
        (var0 var0)
        (var1 var1)        
        )
    (if  (set-difference (select missing-by-var var0) 
                                       (select missing-by-var var1))        
         (send plot-imputed-pairwise 
               :point-symbol (set-difference (select missing-by-var var0) 
                                             (select missing-by-var var1)) 'X))
    (if  (set-difference (select missing-by-var var1) 
                         (select missing-by-var var0))        
         (send plot-imputed-pairwise 
               :point-symbol (set-difference (select missing-by-var var1) 
                                             (select missing-by-var var0)) 'cross))
    (if (intersection (select missing-by-var var1) 
                      (select missing-by-var var0))
        (send plot-imputed-pairwise 
              :point-symbol (intersection (select missing-by-var var1) 
                                          (select missing-by-var var0)) 'diamond))


    (if (non-missing (remove-duplicates (combine missing-by-var)))     
        (send plot-imputed-pairwise 
              :point-color (non-missing (remove-duplicates (combine missing-by-var))) 'green))


    (if (remove-duplicates (append (select missing-by-var var0) 
                                       (select missing-by-var var1)))       
        (send plot-imputed-pairwise 
              :point-color (remove-duplicates (append (select missing-by-var var0) 
                                                      (select missing-by-var var1))) 'RED))
              

   
    
   
    (send plot-imputed-pairwise :use-color t)
    (send plot-imputed-pairwise :redraw)
    
    
    
    ))



(defun missing-in-plot-uniwise (box-imputed missing-by-var var0)
  (let (
        (box-imputed box-imputed)
        (missing-by-var missing-by-var)
        (var0 var0)
        
        )



    (if (non-missing (remove-duplicates (combine missing-by-var)))     
        (send box-imputed 
              :point-color (non-missing (remove-duplicates (combine missing-by-var))) 'green))


    (if (select missing-by-var var0)
                                             
        (send box-imputed 
              :point-color (select missing-by-var var0) 
                                                       'RED))
              

    
   
    (send box-imputed :use-color t)
    (send box-imputed :redraw)
    
    
    
    ))

;; Computes a list with the variables correlated. This is to be used by the 
;;names-list box

(defun List-variables-correlated (variables)
"Args:VARIABLES: A list of names of variables. Returns a list with the combinations of variables, and two lists with the index of variables in the original list to facilitate the location of original variables."
  (let* ((varcorrelated nil)
         (xcorrelated nil)
         (ycorrelated nil)
         (i nil)
         (j nil)
         (variables variables)
         (nvariables (length variables))
         )

    (dotimes (i nvariables)
             (dotimes
              (j i)
              (setf varcorrelated (append varcorrelated 
                                          (list (concatenate 'string 
                                                             (select variables i)"-" (select variables j)))))
              (setf xcorrelated (append xcorrelated (list  j)))
              (setf ycorrelated (append ycorrelated (list  i))) ;esto genera las dos listas de indices para luego recuperar mas facilmente las variables originales
              ))
    (list varcorrelated xcorrelated ycorrelated))
  )


(defun cols-to-update (matrix-comparecorr selection)
  
  (let*
    (
     (matrix-comparecorr matrix-comparecorr)
     (y (select (send matrix-comparecorr :slot-value 'y-variable) 
              (first selection)))
     (x (select (send matrix-comparecorr :slot-value 'x-variable) 
              (first selection)))
     )
    (list x y)))

(defun listwise-missing-in-plot (plot-imputed-listwise missing-by-var var0 var1)
  (let* 
    (
    (plot-imputed-listwise plot-imputed-listwise)
    (missing-by-var missing-by-var)
     (var0 var0)
     (var1 var1)
    )
    
    (if (non-missing (remove-duplicates (combine missing-by-var)))     
         (send plot-imputed-listwise 
          :point-color (non-missing (remove-duplicates (combine missing-by-var))) 'green))
   
        
    (send plot-imputed-listwise :use-color t)
    (send plot-imputed-listwise :redraw)
    )
  )

;;************************************************************************
;; normal probability plot method taken from Forrest Young
;;modificado para que no destruya el orden de los datos
;;************************************************************************

(defun npplot (data &rest args)
  (apply #'normal-probability-plot data args))

(defun normal-probability-plot (data &key (title "Imputed Data Leverages")
                                  point-labels
                                  (variable-label "Leverages")
                                  location
                                  size
                                  (show nil))
"Args: (data &key (title Normal Probability Plot) point-labels
       (variable-label Value) location size (show t))
Makes a normal probability plot for DATA, using title, point-labels and
variable-label as supplied."
     (let* ((seq (rank data))
            (x (/ (- seq (mean seq)) (standard-deviation seq)))
            
            ;(ab (send (regression-model x y :print nil) :coef-estimates))
            (npplot (plot-points x data
                     :title title
                     :variable-labels (list "Normalized Data" variable-label)
                     :point-labels point-labels
                     :location location
                     :size size
                     :show nil)))
       ;(send npplot :abline (first ab) (second ab))
       ;(send npplot :add-lines x y)
       ;(when show (send npplot :show-window))
       npplot))




(defun lines-by-color (plot-imputed-pairwise missing-by-var var0 var1 x y)
  (let* (
        (plot-imputed-pairwise plot-imputed-pairwise)
        (missing-by-var missing-by-var)
        (var0 var0)
        (var1 var1)
         (x x)
         (y y)
         (n (length x))
         (black-green (set-difference 
                       (iseq n) (remove-duplicates 
                                 (append (select missing-by-var var0) 
                                         (select missing-by-var var1)))))
         (black (set-difference 
                       (iseq n) (remove-duplicates 
                                 (combine missing-by-var))))
        (ab-black (if black 
                      (send (regression-model (select x black) 
                                          (select y black):print nil) :coef-estimates)
                      (list 0 0)))
        (ab-green (send (regression-model (select x black-green) 
                                          (select y black-green) :print nil) :coef-estimates))
        (ab-red (send (regression-model x y :print nil) :coef-estimates))
        )
              

    
   
    
    
    (send plot-imputed-pairwise :use-color t )
    
    
;<<<<<<< MISSD4.LSP
    (send plot-imputed-pairwise :draw-color 'blue )
    (when black (send plot-imputed-pairwise :abline (first ab-black) (second ab-black) ))
;=======
;    (send plot-imputed-pairwise :draw-color 'blue )
;    (send plot-imputed-pairwise :abline (first ab-black) (second ab-black) )
;>>>>>>> 1.2
    (send plot-imputed-pairwise :draw-color 'green )
    (send plot-imputed-pairwise :abline (first ab-green) (second ab-green) )
    (send plot-imputed-pairwise :draw-color 'red )
    (send plot-imputed-pairwise :abline (first ab-red) (second ab-red) )
    (send plot-imputed-pairwise :draw-color 'black )
    
    
    ))
    
;<<<<<<< MISSD4.LSP
;=======
(defun compute-mean-c-error (parametters s rows-in-patterns data r p patterns)
"Computes EMC of the predicted values for observed values in data using the parametters estimated by EM. Useful to add a random component to imputed data smaller than using the variance of the variable.
	Args:
	parametters parametters estimated by EM
	s	number of patterns
	rows-in-patterns rows in each pattern of missing data
	data	original data
	r r
	p number of variables
	patterns patterns of missing data"
  (let* (
         (parametters parametters)
         (s s)
         (rows-in-patterns rows-in-patterns)
         (data data)
         (r r)
         (predicted (make-array (array-dimensions data)))
         (p p)
         (patterns patterns)
         (observed-in-missing-pattern-list (observed-in-missing-pattern-list patterns))
         )
    (dotimes (i s) 
             (dotimes (j p)
                      (if (and (equalp (aref r i j) 1) 
                               (> (aref parametters (+ 1 j) (+ 1 j)) 0))
                          (setf parametters   (select (schafer-sweep-operator 
                                                       parametters (list (+ 1 j))) 0))
                          )                           
                      (if  (and (equalp (aref r i j) 0) 
                                (< (aref parametters (+ 1 j) (+ 1 j)) 0))                              
                           (setf parametters 
                                 (select  
                                  (reverse-schafer-sweep-operator 
                                   parametters (list (+ 1 j))) 
                                  0))
                           ))
             
                     (dolist (n (select observed-in-missing-pattern-list  i))
                             (setf parametters 
                                   (select  
                                    (reverse-schafer-sweep-operator 
                                     parametters (list (+ 1 n))) 
                                    0))
                             (dolist (m (select rows-in-patterns i))
                                     (setf var-predictor (set-difference (select observed-in-missing-pattern-list  i) (list n)))
                                     (if var-predictor
                                         (setf (select predicted m n)
                                               (sum (select parametters 0 (+ 1 n))
                                                    (* (select parametters (+ 1 n) 
                                                               (+ 1 var-predictor)) 
                                                       (select data m 
                                                               var-predictor))))
                                 (setf (select predicted m n) (select parametters 0 (+ 1 n))))
                                     )
                             (setf parametters   (select (schafer-sweep-operator 
                                                          parametters (list (+ 1 n))) 0))
                             
                     )
             )
       
    (setf sum-square (map-elements #'reduce #'+
                                   (map-elements #'** 
                                     (- 
                                      (map-elements #'non-missing (column-list 
                                                                   data))
                                      (map-elements #'non-missing (column-list predicted))) 2)))
    
    (setf df (- (map-elements #'length (map-elements #'non-missing (column-list data))) p))
    (setf mean-c-error (sqrt (/ sum-square df)))
   
    mean-c-error)
  )


             

(defun cov-to-corr (matriz)

  (let* (
         (matriz matriz)
         (matriz-output (make-array (array-dimensions matriz)))
         )
    (dotimes (i (array-dimension matriz 0))
             (dotimes (j (array-dimension matriz 0)) 
                      (setf (select matriz-output i j) 
                            (/ (select matriz i j) 
                               (sqrt 
                                (* (select matriz i i) 
                                   (select matriz j j)))))
                      ))
    matriz-output)
  )


(defun corr-to-cov (matriz variances)

  (let* (
         (matriz matriz)
         (matriz-output (make-array (array-dimensions matriz)))
         (variances variances)
         )
    (dotimes (i (array-dimension matriz 0))
             (dotimes (j (array-dimension matriz 0)) 
                      (setf (select matriz-output i j) 
                            (* (select matriz i j) 
                               (sqrt 
                                (* (select variances  i) 
                                   (select variances  j)))))
                      ))
    matriz-output)
  )
#|
(defun corr-to-cov (matriz variances)

  (let* (
         (matriz matriz)
         (matriz-output (make-array (array-dimensions matriz)))
         (variances variances)
         )
    (dotimes (i (array-dimension matriz 0))
             (dotimes (j (array-dimension matriz 0)) 
                      (setf (select matriz-output i j) 
                            (* (select matriz i j) 
                               (sqrt 
                                (* (select variances  i) 
                                   (select variances  j)))))
                      ))
    matriz-output)
  )
